package uk.bl.odin.orcid.htmlmeta;
import java.io.IOException;
import java.net.URL;
import java.util.concurrent.TimeUnit;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import uk.bl.odin.orcid.domain.IsOrcidWork;
import uk.bl.odin.orcid.domain.IsOrcidWorkProvider;
import uk.bl.odin.orcid.ethos.EthosMetaScraper;
import uk.bl.odin.orcid.ethos.ThesisMetadata;
public class DelegatingMetaScraper implements IsOrcidWorkProvider {
public static final Cache<String, HTMLMetaBuilder> cache = CacheBuilder.newBuilder()
.expireAfterWrite(30, TimeUnit.MINUTES).maximumSize(100).build();
@Override
public IsOrcidWork fetch(String url) throws IOException {
//check to see if we have an ethos ID
if (url.startsWith("uk.bl.ethos")){
EthosMetaScraper scrape = new EthosMetaScraper();
return scrape.fetch(url);
}
HTMLMetaBuilder builder = cache.getIfPresent(url);
if (builder == null){
System.out.println("looking up "+url);
Document doc = Jsoup.connect(url).timeout(10000).get();
builder = new HTMLMetaBuilder(doc);
}
return builder.getDublinCoreMeta();
}
}